#Installation of the required package
!pip install scanpy
!pip install louvain
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=80, color_map='plasma')
#Connexion to the goole drive files
from google.colab import drive
drive.mount('/content/drive')
save_file = '/content/drive/My Drive/ClinCluster_DataIntegration/Notebook_Dataset2/scanpy_DR.h5ad'
adata = sc.read_h5ad(save_file)
adata
#Clustering with Louvain algorithm
sc.tl.louvain(adata, key_added = "louvain_1.0") # default resolution in 1.0
sc.tl.louvain(adata, resolution = 0.6, key_added = "louvain_0.6")
sc.tl.louvain(adata, resolution = 0.4, key_added = "louvain_0.4")
sc.tl.louvain(adata, resolution = 1.4, key_added = "louvain_1.4")
sc.pl.umap(adata, color=['louvain_0.4', 'louvain_0.6', 'louvain_1.0','louvain_1.4'])
#method using the wilcoxon test
sc.tl.rank_genes_groups(adata, 'louvain_0.6', method='wilcoxon', n_genes = 1000)
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
from google.colab import files
adata.write('/content/drive/My Drive/ClinCluster_DataIntegration/Notebook_Dataset2/clustering_transitional_results.h5ad')
print('Edited transitional file.')
#Saving the .csv files containing the marker genes
adata = sc.read("/content/drive/My Drive/ClinCluster_DataIntegration/Notebook_Dataset2/clustering_transitional_results.h5ad")
df=pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(100)
from google.colab import files
df.to_csv('/content/drive/My Drive/ClinCluster_DataIntegration/Notebook_Dataset2/marker_genes.csv')
print('Edited marker_genes.csv')
#Saving the .csv file containing the marker genes and the score
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
df =pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names', 'pvals']}).head(50)
from google.colab import files
df.to_csv('/content/drive/My Drive/ClinCluster_DataIntegration/Notebook_Dataset1/marker_genes_scores-groups.csv')
print('Edited marker_genes_scores-groups')
new_cluster_names = [
'Fibroblasts','Secretory Cells',
'Macrophages', 'CD8T-Cells',
'Neutrophils', 'Ciliated Cells',
'Lymphatic Endothelial Cells', 'Smooth Muscle Cells','Endothelial Cells','B-Cells','Mast Cells']
adata.rename_categories('louvain_0.6', new_cluster_names)
sc.pl.umap(adata, color='louvain_0.6', legend_fontsize ='x-small', legend_fontweight = "normal",size = 10,legend_loc='right margin')
adata.obs['merging_clusters']= adata.obs['louvain_0.6']
#Creating heatmap to visualize marker genes
marker_genes = ['DCN', 'CLU', 'HLA-DRA', 'B2M', 'S100A9', 'C1orf194', 'CCL21', 'TAGLN', 'TM4SF1',
'JCHAIN', 'TPSB2', 'MGP', 'WFDC2', 'CD74', 'SRGN', 'S100A8', 'TXN', 'TFF3', 'ADIRF',
'SELE', 'SSR4', 'TPSAB1', 'APOD', 'MMP7', 'CCL5', 'BCL2A1', 'CETN2', 'GNG11', 'ACTA2',
'EMP1', 'IGKC', 'CPA3', 'LUM', 'CRISP3', 'HLA-DRB1', 'CXCR4', 'CXCL8', 'AGR3', 'SDPR',
'MYL9', 'ACKR1', 'MZB1', 'CTSG', 'ADH1B', 'RPS12', 'HLA-DPA1', 'CREM', 'FAM183A', 'S100A10',
'DSTN', 'IFI27', 'FKBP11']#Plotting the 5 main genes of each clusters
sc.pl.heatmap(adata, marker_genes, groupby="louvain_0.6", show_gene_labels='True', log='True')
marker_genes = ['DCN', 'CLU', 'HLA-DRA', 'B2M', 'S100A9', 'C1orf194', 'CCL21', 'TAGLN',
'TM4SF1', 'JCHAIN', 'TPSB2', 'MGP', 'WFDC2', 'CD74', 'SRGN', 'S100A8',
'TXN', 'TFF3', 'ADIRF', 'SELE', 'SSR4', 'TPSAB1', 'APOD', 'MMP7', 'CCL5',
'BCL2A1', 'CETN2', 'GNG11', 'ACTA2', 'EMP1', 'IGKC', 'CPA3', 'LUM', 'CRISP3',
'HLA-DRB1', 'CXCR4', 'CXCL8', 'AGR3', 'SDPR', 'MYL9', 'ACKR1', 'MZB1', 'CTSG',
'ADH1B', 'RPS12', 'HLA-DPA1', 'CREM', 'FAM183A', 'S100A10', 'DSTN', 'IFI27',
'FKBP11', 'IGFBP5', 'MT-CO1', 'GPR183', 'ARHGDIB', 'FTH1', 'C20orf85', 'MMRN1',
'TPM2', 'THBD', 'IGHA1', 'HPGDS', 'DPT', 'RPS6', 'IL1B', 'CD3D', 'SOD2', 'SCGB2A1',
'LYVE1', 'PLN', 'HLA-E', 'SEC11C', 'RGS2', 'LGALS1', 'SLPI', 'HLA-DRB5', 'XCL1', 'LYZ',
'CAPS', 'TFPI', 'MYH11', 'CAV1', 'HERPUD1', 'FOS', 'RPS19', 'HLA-DPB1', 'TMSB4X', 'TPPP3',
'PCAT19', 'MYL6', 'DERL3', 'SPARCL1', 'ACTG1', 'HLA-DQA1', 'CD52', 'PSENEN', 'ATP5E', 'RERGL',
'SOCS3', 'MS4A2'] #Plotting the 10 main genes of each clusters
sc.pl.heatmap(adata, marker_genes, groupby="louvain_0.6", show_gene_labels='True', log='True')
#Plotting marker genes
sc.tl.umap(adata)
print('Secretory Markers')
sc.pl.umap(adata, color=['KRT7','OVGP1','PAX8'], size = 10, use_raw=False) #Secretory Markers
print('Epithelial Markers')
sc.pl.umap(adata, color='EPCAM', size = 10, use_raw=False) #Epithelial Markers
print('Ciliated Markers')
sc.pl.umap(adata, color=['FOXJ1','CAPS'], size = 10, use_raw=False) #Ciliated Markers
print('Leukocyte Marker')
sc.pl.umap(adata, color=['PTPRC'], size = 10, use_raw=False) #Leukocyte Marker
print('CD8T-cell marker')
sc.pl.umap(adata, color=['NKG7','CD8A','CD8B'], size = 10, use_raw=False) #CD8T-cells Markers
print('Fibroblast Marker')
sc.pl.umap(adata, color=['DCN', 'COL1A1'], size = 10, use_raw=False) #Fibroblast Marker
print('Mast cell Markers')
sc.pl.umap(adata, color=['TPSAB1'], size = 10, use_raw=False) #Mast cells Markers
print('Macrophage markers')
sc.pl.umap(adata, color=['AIF1','LYZ'], size = 10, use_raw=False) #Macrophages Markers
print('Smooth muscle cell markers')
sc.pl.umap(adata, color=['TAGLN','MYL9'], size = 10, use_raw=False) #Smooth muscle cell Markers
print('Endothelial cell markers')
sc.pl.umap(adata, color=['SELE'], size = 10, use_raw=False) #Endothelial cell Markers
print('B-Cells markers')
sc.pl.umap(adata, color=['JCHAIN', 'IGKC', 'CD79A', 'CD79B', 'IGHG3'], size = 10, use_raw=False) #B-cell Markers
from google.colab import files
save_file = '/content/drive/My Drive/ClinCluster_DataIntegration/Notebook_Dataset2/scanpy_clustering.h5ad'
adata.write_h5ad(save_file)
print("Edited.")